library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)


options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Load data. Note that this code uses social distancing data from the most recent update in its graphs, while my demographics summary is only updated with data through 5/10/2020. If you want to adjust that, you could remove the lines of code where I take out the social distancing data from sj_socialdistancing_demdata_prepostdifs_manyvars.rds and just use that data.

# load data
sj_dem_by_block <- readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/sj_socialdistancing_demdata_prepostdifs_manyvars.rds") %>% dplyr::select(-c(`% not completely at home`, `% Completely at Home`, `% not completely at home pre shelter`, `% Completely at Home Pre Shelter`))

# load the social distancing data
# get San Jose block groups
scc_blockgroups <- block_groups("CA","Santa Clara", cb=F, progress_bar=F)

# Use tracts sent to us by San Jose
sj_tracts <- st_read("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp") %>%
  st_as_sf() %>%
  st_transform(st_crs(scc_blockgroups))
## Reading layer `CSJ_Census_Tracts' from data source `/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp' using driver `ESRI Shapefile'
## Simple feature collection with 219 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## epsg (SRID):    2227
## proj4string:    +proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=2000000.0001016 +y_0=500000.0001016001 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
sj_citycouncil_disticts <- st_read("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp") %>%
  st_as_sf() %>%
  st_transform(st_crs(scc_blockgroups))
## Reading layer `CITY_COUNCIL_DISTRICTS' from data source `/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp' using driver `ESRI Shapefile'
## Simple feature collection with 10 features and 7 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## epsg (SRID):    2227
## proj4string:    +proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=2000000.0001016 +y_0=500000.0001016001 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
# from code written by others to get SJ blockgroups
sj_blockgroups <-
  scc_blockgroups %>%
  st_centroid() %>%
  st_join(sj_tracts, left = F) %>%
  st_join(sj_citycouncil_disticts%>% dplyr::select(DISTRICTS)) %>%
  mutate(
    DISTRICTS = DISTRICTS %>% factor(levels = c("1","2","3","4","5","6","7","8","9","10"))
  ) %>%
  st_set_geometry(NULL) %>%
  left_join(scc_blockgroups%>% dplyr::select(GEOID), by = "GEOID") %>%
  st_as_sf() %>%
  dplyr::select(GEOID, DISTRICTS)

# the spatial join leaves off two blockgroups which are touching district 9. The following code assigns those to district 9
sj_blockgroups$DISTRICTS[is.na(sj_blockgroups$DISTRICTS)] <- 9

# code from others in the class to get social distancing data 
sj_socialdistancing <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/sj_socialdistancing.rds") %>% 
  mutate(date = date_range_start %>%  substr(1,10) %>% as.Date()) %>% 
  left_join(sj_blockgroups, by = c("origin_census_block_group" = "GEOID")) %>% 
  filter(!is.na(DISTRICTS))

# obtaining weekends
weekends <-
  sj_socialdistancing %>% 
  filter(!duplicated(date)) %>% 
  arrange(date) %>% 
  mutate(
    weekend = 
      ifelse(
        (date %>% as.numeric()) %% 7 %in% c(2,3),
        T,
        F
      )
  ) %>% 
  dplyr::select(date,weekend)

sj_socialdistancing <- 
  sj_socialdistancing %>% 
  left_join(weekends)

# date of the shelter in place order
shelter_start <- "2020-03-16" %>% as.Date()

# get average staying at home on weekdays in January and February, and add trendlines and add column indicating before or after shelter in place
sj_pre_sd_at_home_average <- sj_socialdistancing %>% 
  filter(weekend == F) %>% 
  filter(date <  as.Date("2020-03-01")) %>%
  group_by(origin_census_block_group) %>% 
  summarize(completely_home_device_count = sum(completely_home_device_count), device_count = sum(device_count)) %>% 
  mutate(`% Completely at Home` = (completely_home_device_count/device_count*100) %>% round(1), `% leaving home` = (100 - `% Completely at Home`)) %>%
  left_join(sj_dem_by_block, by = c("origin_census_block_group" = "blockgroup"))

sj_pre_sd_at_home_average[is.na(sj_pre_sd_at_home_average)] <- 0

sj_pre_sd_at_home_average <- sj_pre_sd_at_home_average %>%
  mutate(
    ami_trendline =
      fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`% over 125,000`)),
    young_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`percent 20-29`)),
    elderly_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`percent elderly`)), 
    hisp_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`% hispanic/latino`)),
    english_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`% speaking english > well`)),
    asian_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`% Asian`)),
    educ_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`percent associates or higher`)),
    internet_trendline = fitted(lm(sj_pre_sd_at_home_average$`% leaving home` ~ sj_pre_sd_at_home_average$`percent high speed`))) %>%
  cbind(`Before or After Shelter-in-Place` = "Before shelter-in-place")

# same for since shelter in place
sj_post_sd_at_home_average <- sj_socialdistancing %>% 
  filter(weekend == F) %>% 
  filter(date >  shelter_start) %>%
  group_by(origin_census_block_group) %>% 
  summarize(completely_home_device_count = sum(completely_home_device_count), device_count = sum(device_count)) %>% 
  mutate(`% Completely at Home` = (completely_home_device_count/device_count*100) %>% round(1), `% leaving home` = (100 - `% Completely at Home`)) %>%
  left_join(sj_dem_by_block, by = c("origin_census_block_group" = "blockgroup"))

sj_post_sd_at_home_average[is.na(sj_post_sd_at_home_average)] <- 0

sj_post_sd_at_home_average <- sj_post_sd_at_home_average %>%
  mutate(
    ami_trendline =
      fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`% over 125,000`)),
    young_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`percent 20-29`)),
    elderly_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`percent elderly`)), 
    hisp_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`% hispanic/latino`)),
    english_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`% speaking english > well`)),
    asian_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`% Asian`)),
    educ_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`percent associates or higher`)),
    internet_trendline = fitted(lm(sj_post_sd_at_home_average$`% leaving home` ~ sj_post_sd_at_home_average$`percent high speed`))) %>%
  cbind(`Before or After Shelter-in-Place` = "After shelter-in-place")

sj_dem_distancing_combined <- rbind(sj_pre_sd_at_home_average, sj_post_sd_at_home_average)

# convert the before/after column to factor
sj_dem_distancing_combined$`Before or After Shelter-in-Place` <- factor(sj_dem_distancing_combined$`Before or After Shelter-in-Place`, levels = c("Before shelter-in-place", "After shelter-in-place"))

saveRDS(sj_dem_distancing_combined, "/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/sj_socialdistancing_demdata_prepost_v2.rds")

Now make plots.

AMI

fig_ami <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`% over 125,000`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`% over 125,000`,
      y = ~ami_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  animation_slider(
    pad = list(t =75),
    currentvalue = list(visible=F)
  ) %>%
  layout(xaxis = list(title = '% of households making over $125,000'), yaxis = list(title = '% leaving home'), margin = list(l = 75,r = 75))

fig_ami

Young adults

fig_young <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`percent 20-29`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`percent 20-29`,
      y = ~young_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  animation_slider(
    pad = list(t =75),
    currentvalue = list(visible=F)
  ) %>%
  layout(xaxis = list(title = '% of residents ages 20-29', autorange = "reversed"), yaxis = list(title = '% leaving home'), margin = list(l = 75,r = 75))

fig_young

Elderly population

fig_elderly <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`percent elderly`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`percent elderly`,
      y = ~elderly_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  animation_slider(
    pad = list(t =75),
    currentvalue = list(visible=F)
  ) %>%
  layout(xaxis = list(title = '% elderly'), yaxis = list(title = '% leaving home'), margin = list(l = 75,r = 75))

fig_elderly

Hispanic/Latino population

fig_hisp <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`% hispanic/latino`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`% hispanic/latino`,
      y = ~hisp_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  animation_slider(
    pad = list(t =75),
    currentvalue = list(visible=F)
  ) %>%
  layout(xaxis = list(title = '% of residents that are Hispanic/Latino', autorange = "reversed"), yaxis = list(title = '% leaving home'), margin = list(l = 75,r = 75))

fig_hisp

English language ability

fig_english <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`% speaking english > well`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`% speaking english > well`,
      y = ~english_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  animation_slider(
    pad = list(t =75),
    currentvalue = list(visible=F)
  ) %>%
  layout(xaxis = list(title = '% of residents speaking English well'), yaxis = list(title = '% leaving home'), margin = list(l = 75,r = 75))

fig_english

Asian population

fig_asian <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`% Asian`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`% Asian`,
      y = ~asian_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  animation_slider(
    pad = list(t =75),
    currentvalue = list(visible=F)
  ) %>%
  layout(xaxis = list(title = '% of residents that are Asian'), yaxis = list(title = '% leaving home'), margin = list(l = 75,r = 75))

fig_asian

Education

fig_educ <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`percent associates or higher`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`percent associates or higher`,
      y = ~educ_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  animation_slider(
    pad = list(t =75),
    currentvalue = list(visible=F)
  ) %>%
  layout(xaxis = list(title = '% of residents with a degree at the Associate\'s level or higher'), yaxis = list(title = '% leaving home'), margin = list(l = 75,r = 75))

fig_educ

High speed internet access

fig_internet <- 
  plot_ly (sj_dem_distancing_combined) %>%
    add_trace(
      x = ~`percent high speed`, 
      y = ~`% leaving home`, 
      frame = ~`Before or After Shelter-in-Place`, 
      type = 'scatter', 
      mode = 'markers', 
      showlegend = F
    ) %>% 
    add_trace(
      x = ~`percent high speed`,
      y = ~internet_trendline,
      type = 'scatter',
      mode = 'lines',
      line = list(size = 5, color = 'rgba(255, 165, 0, 1)'),
      frame = ~`Before or After Shelter-in-Place`,
      showlegend = F
    ) %>% 
  animation_button(visible = F) %>%
  animation_slider(
    pad = list(t =75),
    currentvalue = list(visible=F)
  ) %>%
  layout(xaxis = list(title = '% of household with high speed internet access'), yaxis = list(title = '% leaving home'), margin = list(l = 75,r = 75))

fig_internet